Clouds

In this file, the goal is to quantify the historical cloud cover in the planned flight boxes and to assess the implications for BioSCape operations.

To assess potential cloud cover during the BioSCape campaign, here we use cloud cover data from the MODIS aqua (https://developers.google.com/earth-engine/datasets/catalog/MODIS_061_MOD09GA) and terra (https://developers.google.com/earth-engine/datasets/catalog/MODIS_061_MOD09GA) daily surface reflectance products with a 1km resolution. The QA metadata for these products assigns each raster cell one of 4 cloud categories: Clear, Cloudy, Mixed, or “Not set, assumed clear”. Here, we consider any raster cell categorized as “cloudy” to be cloud covered and other categories to be cloud-free (Figure 1).

# Load required packages

# Load packages
  library(rgee)
  library(targets)
  library(sf)
  library(terra)
  library(raster)
  library(tidyverse)
  library(lubridate)
  library(leaflet)
  library(ggplot2)
  library(ggpubr)
  library(leafem)
  library(plotly)



#Load required data

  # get domain
    domain <- st_read("data/output/domain.gpkg",
                      quiet = TRUE)
    domain_sf <- domain
  
  # get flight boxes
    boxes <- st_read("data/flight_planning/20221026_flightboxes.gpkg",
                     quiet = TRUE)
    
    boxes$id <- 1:20 # need a unique ID to make things easier
    
    boxes$ordered_id[c(11,10,15,14,20,18,2,13,17,16,4,1,6,8,3,19,5,9,7,12)] <- 1:20
    
    boxes_sf <- boxes
    
  # Download table from drive (to see the code underlying this or to update the data, see the file "R/mock_flights_earth_engine.R")
    googledrive::drive_download(file = "EMMA/cloud_stats.csv",
                                path = "data/test_cloud_stats.csv",
                                overwrite = TRUE)
    
    cloud_table <- read.csv("data/test_cloud_stats.csv")

  # Parse date

    cloud_table %>%
      mutate(year = year(date),
             month = month(date),
             day = day(date),
             day_of_year = yday(date)) -> cloud_table
#Load in the correct projection (for some reason this is handled incorrectly otherwise)
  nasa_proj <- "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +R=6371007.181 +units=m +no_defs"

#Load the example layers
    
  list.files(path = "data/flight_planning/",
             pattern = "example_cloud_cover",
             full.names = TRUE) %>%
    rast() -> cloud_examples
  
# Generate the bounding box used
    domain_plus_boxes <-
    st_union(domain_sf,boxes_sf) %>%
      st_bbox() %>%
      st_as_sfc()


  crs(cloud_examples) <- nasa_proj
  
  cloud_examples %>%
    project(y = terra::crs(domain_sf,proj=TRUE)) %>%
    crop(y = vect(domain_plus_boxes)) -> cloud_examples

c1 <-  
cloud_examples[[1]] %>%
  as.data.frame(xy = TRUE) %>%
  mutate(clouds = as.logical(state_1km))%>%
ggplot() +
  geom_tile(aes(x = x,
                y = y,
                fill = clouds))+
  scale_fill_manual(values = c("light blue","white"))+
  geom_sf(data = domain_sf,fill=NA)+
  xlab(NULL)+
  ylab(NULL)+
  scale_x_continuous(expand = (c(0,0)))+
  scale_y_continuous(expand = c(0,0))

c2 <-  
cloud_examples[[2]] %>%
  as.data.frame(xy = TRUE) %>%
  mutate(clouds = as.logical(state_1km))%>%
ggplot() +
  geom_tile(aes(x = x,
                y = y,
                fill = clouds))+
  scale_fill_manual(values = c("light blue","white"))+
  geom_sf(data = domain_sf,fill=NA)+
  xlab(NULL)+
  ylab(NULL)+
  scale_x_continuous(expand = (c(0,0)))+
  scale_y_continuous(expand = c(0,0))

c3 <-  
cloud_examples[[3]] %>%
  as.data.frame(xy = TRUE) %>%
  mutate(clouds = as.logical(state_1km))%>%
ggplot() +
  geom_tile(aes(x = x,
                y = y,
                fill = clouds))+
  scale_fill_manual(values = c("light blue","white"))+
  geom_sf(data = domain_sf,fill=NA)+
  xlab(NULL)+
  ylab(NULL)+
  scale_x_continuous(expand = (c(0,0)))+
  scale_y_continuous(expand = c(0,0))

ggarrange(c1,c2,c3,
          common.legend = TRUE,
          ncol = 1)
Figure 1. Binary MODIS cloud data. Clouds are in white, non-clouds in blue.

Figure 1. Binary MODIS cloud data. Clouds are in white, non-clouds in blue.

Mean Cloud Cover

To visualize spatial patterns on cloud cover, we calculated the average cloud cover for each raster cell (Figure 2). Averages were taken across days (October-December) and years (2000-present).

#Pull in other bioscape layers

    cloud_table %>%
      na.omit()%>%
      filter(month != 9)%>%
      mutate(binary_clear = dplyr::if_else(mean <= .1,true = 1,false = 0)) %>%
      group_by(id)%>%
      summarize(prop_clear = sum(binary_clear)/n(),
                clear_days = sum(binary_clear),
                total_days = n())%>%
      mutate(prop_clear = round(prop_clear,2))%>%
      inner_join(x = boxes_sf)%>%
    st_transform(crs = st_crs(4326)) -> flights

  team_requests <- st_read("data/manual_downloads/BIOSCAPE_proposed/20221014_team_polygons.gpkg",quiet = TRUE) %>%
      st_transform(crs = st_crs(4326))

  domain_sf %>%
      st_transform(crs = st_crs(4326)) -> domain_wgs84
  
  mean_cloud_cover <- raster("data/output/mean_cloud_cover.tif")
  

#Make a palette
  pal <- colorNumeric(palette = colorRamp(c("white", "blue"), interpolate = "spline"),
                      domain = unique(flights$prop_clear))
  
  pal2 <- colorNumeric(palette = "Blues",
                      domain = 0:1, #unique(values(mean_cloud_cover)),
                      reverse = TRUE)



#Make labels
labels <- sprintf(as.character(flights$prop_clear)) %>%
  lapply(htmltools::HTML)
# 
# leaflet(data = flights) %>%
#   addProviderTiles("Esri.NatGeoWorldMap", group = "NatGeo") %>%
#   addProviderTiles(providers$Esri.WorldImagery, group = "World Imagery") %>%
#   addMapPane("flights", zIndex = 420) %>%
#   addMapPane("requests", zIndex = 410)%>% 
#      addPolygons(stroke = TRUE,
#               group = "Flights",
#               color = ~pal(prop_clear),
#               opacity = 1,
#               label = labels,
#               options = pathOptions(pane = "flights"))%>%
#   addPolygons(data = domain_wgs84,
#               stroke = TRUE,
#               color = "grey",
#               fill = FALSE,
#               weight = 3) %>%
#       addPolygons(data = team_requests%>%
#                 st_zm(drop = T, what = "ZM"),
#                   stroke = TRUE,
#                   color = "brown",
#                   group = "Requests",
#               options = pathOptions(pane = "requests"))%>%  
#     addMouseCoordinates() %>%
#     #addImageQuery(sampling_options_wgs84, type="mousemove", layerId = "park_name") %>%
#   leaflet::addLegend(position = "bottomright",
#             pal = pal,
#             values = unique(flights$prop_clear),
#             opacity = 1,
#             title = "Cluster") %>%
#     addLayersControl(
#     baseGroups = c("World Imagery","NatGeo"),
#     overlayGroups = c("Flights","Requests"),
#     options = layersControlOptions(collapsed = FALSE),
#     position = "topright")


flights %>%
  left_join(y = boxes_sf %>%
    st_drop_geometry())%>%
leaflet() %>%
  addProviderTiles("Esri.NatGeoWorldMap", group = "NatGeo") %>%
  addProviderTiles(providers$Esri.WorldImagery, group = "World Imagery") %>%
  addMapPane("flights", zIndex = 420) %>%
  addMapPane("requests", zIndex = 410) %>% 
  addRasterImage(x = mean_cloud_cover,
                 group = "Clouds",
                 colors = pal2,
                 opacity = 1) %>%
     addPolygons(stroke = TRUE,
              group = "Flights",
              color = "black",
              opacity = 1,
              weight = 1,
              label = ~as.character(ordered_id),
              labelOptions = labelOptions(noHide = T,
                                          textOnly = T,
                                          textsize = 3),
              options = pathOptions(pane = "flights"),
              fill = FALSE)%>%
      addPolygons(data = team_requests%>%
                st_zm(drop = T, what = "ZM"),
                  stroke = TRUE,
                  color = "black",
                  group = "Requests",
              options = pathOptions(pane = "requests"),
              fill = FALSE,
              weight = 1)%>%  
    addMouseCoordinates() %>%
    #addImageQuery(sampling_options_wgs84, type="mousemove", layerId = "park_name") %>%
  leaflet::addLegend(position = "bottomright",
            pal = pal2,
            values = 0:1,
            opacity = 1,
            title = "Mean\nCloud\nCover") %>%
    addLayersControl(
    baseGroups = c("World Imagery","NatGeo"),
    overlayGroups = c("Flights","Requests","Clouds"),
    options = layersControlOptions(collapsed = FALSE),
    position = "topright") %>%
  hideGroup("Requests")

Figure 2. Mean cloud cover. Whiter raster cells are cloudier, bluer cells are clearer. Boxes shown are flight boxes, labeled with a unique ID. Polygons are the requested sampling regions.

Cloud cover over time

To visualize temporal patterns in cloud cover, we calculated the mean cloud cover for each flight box (Figure 3).

  #ID by day of year
  
    cloud_table %>%
        mutate(date = as.Date(day_of_year,origin="2022-12-31"))%>%
        mutate(start_of_month = floor_date(date,unit = "month"),
               month_label = month(start_of_month,label = TRUE),
               julian_label = yday(start_of_month),
               day_of_month = mday(date),
               md_label = paste(month_label,"-",day_of_month,sep = "")) %>%
      group_by(id,day_of_year) %>%
      left_join(y = boxes_sf %>%
      st_drop_geometry()) -> temp
      

    { temp %>%
      ggplot() +
      geom_tile(mapping = aes(x = day_of_year,
                              y = ordered_id,
                              fill = mean))+
      scale_fill_gradient(low = "sky blue",
                          high = "white")+
      facet_wrap(~year)+
        scale_x_continuous(breaks = temp$julian_label,
                           labels = temp$month_label)+
      labs(fill="Mean\nCloud\nCover",
           y="Flight Box ID",
           x = "Day of Year")+
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))} %>%
  ggplotly()

Figure 3. Rows represent different flight boxes (see Fig 2.), columns different days.

Proportion of clear days for each flight box

Here, we define a “clear” day as one with a mean cloud cover of less than 10 percent.

    #prop clear days
    {cloud_table %>%
      na.omit()%>%
      filter(month != 9)%>%
      mutate(binary_clear = dplyr::if_else(mean <= .1,true = 1,false = 0)) %>%
      group_by(id)%>%
      summarize(prop_clear = sum(binary_clear)/n(),
                clear_days = sum(binary_clear),
                total_days = n())%>%
      inner_join(x = boxes_sf)%>%
      ggplot(mapping = aes(fill = prop_clear))+
      geom_sf()+
      geom_sf(data = domain,inherit.aes = FALSE,fill=NA)+
      scale_fill_gradient(low = "white",high = "sky blue",limits=c(0,1))+
      geom_sf_text(aes(label = round(prop_clear,digits = 2)))+
      labs(fill = "Prop.\nClear",
           x=NULL,
           y=NULL)} %>%
  ggplotly()

Figure 6. Proportion of clear days (mean cloud cover < 10% ) for each flight box. Values in boxes are the proportion of clear days for that box

Campaign Simulations

In order to estimate how successful our campaign might be, we conducted a simulation using the MODIS aqua and terra cloud data.

  1. Calculate mean cloud cover for each flight box for each day in the temporal window of interest (October-December, 2000 to present).

  2. Rank flight boxes in descending order of cloud cover.

  3. For each day in time time series:

  1. If there are at least 60 days in which to sampling, continue. Else, skip.
  2. Sample the highest ranking (i.e. hardest to sample) site that is below 5% cloud cover (if any) and hasn’t been sampled.
  3. Repeat b until all sites have been sampled or no more time remains
  #code underlying simulations available in the file "R/flight_window.R"

   
      #readRDS("data/temp/sim_output_10pct.RDS")%>%
      readRDS("data/temp/sim_output_05pct.RDS")%>%
        #readRDS("data/temp/sim_output_01pct.RDS")%>%  
        na.omit()%>%
        group_by(start_date)%>%
        summarise(sites_done = sum(!is.na(box_id)),
                  mean_cc = mean(na.omit(cloud_cover)),
                  days_taken = max(date)-min(start_date)+1)%>%
        ungroup()%>%
        mutate(day_of_year = yday(as_date(start_date)),
               year = year(as_date(start_date)))%>%
        group_by(day_of_year)%>%
        summarise(q0 = quantile(days_taken,probs=0),
                  q25 = quantile(days_taken,probs=0.05),
                  q50 = quantile(days_taken,probs=0.5),
                  q75 = quantile(days_taken,probs=0.95),
                  q1 = quantile(days_taken,probs=1)
        ) %>%
        mutate(date = as.Date(day_of_year,origin="2022-12-31"))%>%
        mutate(start_of_month = floor_date(date,unit = "month"),
               month_label = month(start_of_month,label = TRUE),
               julian_label = yday(start_of_month),
               day_of_month = mday(date),
               md_label = paste(month_label,"-",day_of_month,sep = ""))->test
      
      test %>%    
        ggplot()+
        geom_ribbon(aes(ymin=q0,ymax=q1, x = day_of_year),col="grey",alpha=0.2)+
        geom_ribbon(aes(ymin=q25,ymax=q75, x = day_of_year),col="grey",alpha=0.5)+
        geom_line(aes(x=day_of_year,y=q50))+
        geom_hline(yintercept = 37,lty=2)+
        scale_x_continuous(breaks = test$day_of_year,
                           labels = test$md_label)+ #note: it seems like it should be possible to inherit these
        ylab("Flight Days Needed")+
        xlab("Campaign Starting Day")+
        geom_text(label = "estimated total number of flight days",
                  y=37.3,
                  x=280)+
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 
Figure 6. Estimated length of time needed to finish campaign. The dark grey area represents the 50% CI, the light gray the  90% CI, and the solid line represents the median.

Figure 6. Estimated length of time needed to finish campaign. The dark grey area represents the 50% CI, the light gray the 90% CI, and the solid line represents the median.